# ==============================================================================
# file name: 03-identify-repeated-participation-FB.sql
# date: Mar 12, 2024
# author: Bernhard Clemm 
# purpose: identify attempts at repeated participation (Facebook)
# THIS SCRIPT REQUIRES ACCESS TO THE RAW DATA AND SERVES FOR REFERENCE ONLY
# ==============================================================================

SELECT 
v.person_id, v.wave, v.created_utc, v.duration_s_5_na, v.same_as_prev_url, v.url,
CASE 
WHEN (v.url_host LIKE '%questionpro.com' AND REGEXP_LIKE(v.url, '/a/TakeSurvey\?tt=[a-zA-Z0-9]')) 
THEN CONCAT(v.url_protocol, '//', v.url_host, v.url_path, '?', REGEXP_EXTRACT(v.url_query, '.+?(?=\&)'))
ELSE CONCAT(v.url_protocol, '//', v.url_host, v.url_path) 
END AS url_quest
FROM 
(SELECT *, CASE WHEN url = url_previous THEN 1 ELSE 0 END same_as_prev_url -- indicator for whether visit happening directly after each other.
FROM
(SELECT w1id AS person_id, wave, created_utc, duration_s_5_na, url, 
URL_EXTRACT_PROTOCOL(url) url_protocol, 
URL_EXTRACT_HOST(url) url_host, 
URL_EXTRACT_PATH(url) url_path, 
URL_EXTRACT_QUERY(url) url_query, 
LAG(url) OVER(PARTITION BY w1id ORDER BY created_utc) AS url_previous
FROM visits_professionals
ORDER BY w1id, created_utc)) v
WHERE 
(v.url_host LIKE '%confirmit.com' AND REGEXP_LIKE(v.url_path, '^/wix/[a-zA-Z0-9]')) OR 
(v.url_host LIKE '%surveygizmo.com' AND REGEXP_LIKE(v.url_path, '^/s3/[a-zA-Z0-9]')) OR
(v.url_host LIKE '%surveygizmo.eu' AND REGEXP_LIKE(v.url_path, '^/s3/[a-zA-Z0-9]')) OR
(v.url_host LIKE '%surveymonkey.com' AND REGEXP_LIKE(v.url_path, '^/r/[a-zA-Z0-9]')) OR
(v.url_host LIKE '%qualtrics.com' AND REGEXP_LIKE(v.url_path, '^/jfe/form/[a-zA-Z0-9]')) OR
(v.url_host LIKE '%survey.cmix.com' AND REGEXP_LIKE(v.url_path, '^/[A-Z0-9]')) OR
(v.url_host LIKE '%questionpro.com' AND REGEXP_LIKE(v.url_path, '/t/[a-zA-Z0-9]')) OR
(v.url_host LIKE '%questionpro.com' AND REGEXP_LIKE(v.url, '/a/TakeSurvey\?tt=[a-zA-Z0-9]')) OR
(v.url_host LIKE '%formsite.com' AND REGEXP_LIKE(v.url_path, '[a-zA-Z0-9]/index\.html$')) OR 
(v.url_host LIKE '%unipark.de' AND REGEXP_LIKE(v.url_path, '^/uc/[a-zA-Z0-9]')) OR
(v.url_host LIKE '%typeform.com' AND REGEXP_LIKE(v.url_path, '^/to/[a-zA-Z0-9]')) OR
(v.url_host LIKE '%formstack.com' AND REGEXP_LIKE(v.url_path, '^/forms/(?![a-zA-Z0-9]*index\.php)')) OR
(v.url_host LIKE '%zohopublic.com' AND REGEXP_LIKE(v.url_path, '^/zs/[a-zA-Z0-9]')) OR
(v.url_host LIKE '%zohopublic.eu' AND REGEXP_LIKE(v.url_path, '^/zs/[a-zA-Z0-9]'))

-- exported as repeated_participation_FB.csv (not not contained in reproduction repository as raw data)